Conversation
e98aadd to
8022743
Compare
8022743 to
598f448
Compare
598f448 to
08e8a93
Compare
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 3 potential issues.
Autofix Details
Bugbot Autofix prepared fixes for all 3 issues found in the latest run.
- ✅ Fixed: Benchmark script uses removed async-with stream pattern
- Updated
stream_turnto iterate directly overprovider.chat(...)withasync for, matching the stream interface that only implements__aiter__.
- Updated
- ✅ Fixed: Message identity only considers first tool call
- Message identities now include all assistant tool calls and the Realtime diff path expands multi-tool-call assistant messages into per-call items so no tool calls are dropped.
- ✅ Fixed: Double normalization of tools in LlmAgent+LlmProvider pipeline
- Added a fast-path tool resolver in
LlmProviderthat reuses already-normalizedFunctionToolinputs and only calls_normalize_toolswhen needed.
- Added a fast-path tool resolver in
Or push these changes by commenting:
@cursor push 471fcca35d
Preview (471fcca35d)
diff --git a/line/llm_agent/provider.py b/line/llm_agent/provider.py
--- a/line/llm_agent/provider.py
+++ b/line/llm_agent/provider.py
@@ -14,7 +14,7 @@
from typing import Any, List, Optional, Protocol, Tuple, runtime_checkable
from line.llm_agent.config import LlmConfig, _normalize_config
-from line.llm_agent.tools.utils import _normalize_tools
+from line.llm_agent.tools.utils import FunctionTool, _normalize_tools
@dataclass
@@ -105,9 +105,8 @@
):
self._model = model
normalized_config = _normalize_config(config or LlmConfig())
- normalized_tools, _ = _normalize_tools(tools, model=model) if tools else (None, None)
self._config = normalized_config
- self._tools = normalized_tools or []
+ self._tools = _resolve_tools(tools, model=model)
use_realtime = backend == "realtime" or (backend is None and _is_realtime_model(model))
use_websocket = backend == "websocket" or (backend is None and _is_websocket_model(model))
@@ -140,7 +139,7 @@
def chat(self, messages, tools=None, config=None, **kwargs):
cfg = _normalize_config(config) if config else self._config
- effective_tools = _normalize_tools(tools, model=self._model)[0] if tools else self._tools
+ effective_tools = _resolve_tools(tools, model=self._model) if tools else self._tools
return self._backend.chat(messages, effective_tools, config=cfg, **kwargs)
async def warmup(self, config=None):
@@ -199,18 +198,29 @@
return lower.startswith("gpt-5.2") or lower.startswith("gpt5.2")
+def _resolve_tools(tools: Optional[List[Any]], model: str) -> List[FunctionTool]:
+ """Resolve tools to FunctionTools, avoiding no-op re-normalization."""
+ if not tools:
+ return []
+ if all(isinstance(tool, FunctionTool) for tool in tools):
+ return list(tools)
+ return _normalize_tools(tools, model=model)[0]
+
+
def _message_identity(msg: Message) -> tuple:
"""Compute an identity fingerprint for a single Message.
Used by both WebSocket providers for divergence detection / diff-sync.
- For assistant messages with tool calls, identity is derived from the
- *first* tool call (mirrors how the server tracks multi-tool-call turns
- as a single logical unit).
+ For assistant messages with tool calls, identity includes all tool calls
+ so divergence checks detect changes to any call in the turn.
"""
if msg.tool_calls:
- tc = msg.tool_calls[0]
- return ("assistant_tool_call", tc.name, tc.arguments, tc.id)
+ if len(msg.tool_calls) == 1:
+ tc = msg.tool_calls[0]
+ return ("assistant_tool_call", tc.name, tc.arguments, tc.id)
+ tool_calls_key = tuple((tc.name, tc.arguments, tc.id) for tc in msg.tool_calls)
+ return ("assistant_tool_calls", tool_calls_key)
return (msg.role, msg.content or "", msg.tool_call_id or "", msg.name or "")
diff --git a/line/llm_agent/realtime_provider.py b/line/llm_agent/realtime_provider.py
--- a/line/llm_agent/realtime_provider.py
+++ b/line/llm_agent/realtime_provider.py
@@ -399,11 +399,8 @@
def _message_to_item(msg: Message) -> Dict[str, Any]:
"""Convert a Message to a Realtime API conversation item dict.
- Note: for assistant messages with multiple tool calls, only the first
- tool call is converted. The Realtime API represents each tool call as a
- separate conversation item, but the diff algorithm tracks identity at the
- message level. Handling multi-tool-call expansion here would require
- reworking the diff model.
+ Assistant tool-call messages must contain exactly one tool call; callers
+ are responsible for expanding multi-tool-call turns into separate messages.
"""
if msg.role == "user":
return {
@@ -414,13 +411,8 @@
if msg.role == "assistant":
if msg.tool_calls:
- if len(msg.tool_calls) > 1:
- logger.warning(
- "Realtime API: assistant message has %d tool calls but only "
- "the first is converted (dropping %s)",
- len(msg.tool_calls),
- [tc.name for tc in msg.tool_calls[1:]],
- )
+ if len(msg.tool_calls) != 1:
+ raise ValueError("Assistant tool-call message must contain exactly one tool call")
tc = msg.tool_calls[0]
return {
"type": "function_call",
@@ -464,7 +456,19 @@
if msg.role == "system":
system_parts.append(msg.content or "")
else:
- non_system.append(msg)
+ if msg.role == "assistant" and msg.tool_calls and len(msg.tool_calls) > 1:
+ for tc in msg.tool_calls:
+ non_system.append(
+ Message(
+ role="assistant",
+ content=msg.content,
+ tool_calls=[tc],
+ tool_call_id=msg.tool_call_id,
+ name=msg.name,
+ )
+ )
+ else:
+ non_system.append(msg)
desired_instructions = "\n\n".join(system_parts) if system_parts else None
diff --git a/line/llm_agent/scripts/bench_latency.py b/line/llm_agent/scripts/bench_latency.py
--- a/line/llm_agent/scripts/bench_latency.py
+++ b/line/llm_agent/scripts/bench_latency.py
@@ -164,12 +164,11 @@
ttft = None
text_parts: list[str] = []
- async with provider.chat(messages, config=config) as stream:
- async for chunk in stream:
- if chunk.text:
- if ttft is None:
- ttft = (time.perf_counter() - t0) * 1000
- text_parts.append(chunk.text)
+ async for chunk in provider.chat(messages, config=config):
+ if chunk.text:
+ if ttft is None:
+ ttft = (time.perf_counter() - t0) * 1000
+ text_parts.append(chunk.text)
total = (time.perf_counter() - t0) * 1000
return TurnResult(
diff --git a/line/llm_agent/websocket_provider.py b/line/llm_agent/websocket_provider.py
--- a/line/llm_agent/websocket_provider.py
+++ b/line/llm_agent/websocket_provider.py
@@ -447,19 +447,26 @@
def _extract_model_output_identity(response: Dict[str, Any]) -> Optional[tuple]:
"""Derive a single message-level identity from a Responses API output.
- Mirrors ``_message_identity``: if the model produced tool calls we key
- on the first one; otherwise we key on the full text.
+ Mirrors ``_message_identity``: single-tool-call outputs use a compact key,
+ while multi-tool-call outputs include every call in order.
"""
output_items = response.get("output", [])
function_calls = [i for i in output_items if i.get("type") == "function_call"]
if function_calls:
- fc = function_calls[0]
+ if len(function_calls) == 1:
+ fc = function_calls[0]
+ return (
+ "assistant_tool_call",
+ fc.get("name", ""),
+ fc.get("arguments", ""),
+ fc.get("call_id", ""),
+ )
return (
- "assistant_tool_call",
- fc.get("name", ""),
- fc.get("arguments", ""),
- fc.get("call_id", ""),
+ "assistant_tool_calls",
+ tuple(
+ (fc.get("name", ""), fc.get("arguments", ""), fc.get("call_id", "")) for fc in function_calls
+ ),
)
# Concatenate text across all message output items.08e8a93 to
e97ad49
Compare
e97ad49 to
dc4c1a9
Compare
dc4c1a9 to
67069c3
Compare
149738e to
7a2463b
Compare
3a53537 to
a8f738f
Compare
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 1 potential issue.
Autofix Details
Bugbot Autofix prepared a fix for the issue found in the latest run.
- ✅ Fixed: x
- x
Or push these changes by commenting:
@cursor push d109cf302a
Preview (d109cf302a)
diff --git a/line/llm_agent/llm_agent.py b/line/llm_agent/llm_agent.py
--- a/line/llm_agent/llm_agent.py
+++ b/line/llm_agent/llm_agent.py
@@ -311,7 +311,7 @@
stream = self._llm.chat(
messages,
- tools or None,
+ tools,
config=config,
**chat_kwargs,
)|
I will take another commit-by-commit pass because this is pretty large surface area |
4e0603c to
3a00fba
Compare
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
3.9 requires async primitives (lock/queues) to be initialized inside the context of a loop. The loop isn't available at initialization time. Fix: lazy initialize
3a00fba to
c571e05
Compare
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 2 potential issues.
Bugbot Autofix is OFF. To automatically fix reported issues with cloud agents, enable autofix in the Cursor dashboard.
| # Lock ownership transfers to the stream — released in __aexit__ | ||
| def on_response_done(response: Dict[str, Any]) -> None: | ||
| for item in response.get("output", []): | ||
| _track_output_item(self._history, item) |
There was a problem hiding this comment.
Realtime provider drain corrupts history on cancellation
High Severity
The on_response_done callback in _RealtimeProvider._setup_chat unconditionally appends output items from the response to _history, regardless of response status. When a stream is cancelled (e.g., barge-in), _cancel_and_drain calls stream.drain() which invokes _on_response_done with the cancelled response data. Partial output items from the cancelled response get tracked in _history, corrupting diff-sync state for subsequent chat() calls. The _WebSocketProvider correctly guards against this by checking response.get("status") == "completed" before calling _finalize_response.
Additional Locations (1)
|
|
||
| async def _connect(self) -> None: | ||
| """Open a new WS connection and wait for session.created.""" | ||
| url = f"{WS_URL}?model={self._model}" |
There was a problem hiding this comment.
Realtime provider doesn't strip openai/ model prefix
Medium Severity
_RealtimeProvider._connect uses self._model directly in the WebSocket URL (f"{WS_URL}?model={self._model}") without stripping the openai/ prefix. The _is_realtime_model detection function accepts LiteLLM-style names like "openai/gpt-4o-realtime-preview", but the raw name gets embedded in the URL, causing an API error. The _WebSocketProvider has _normalize_openai_model_name for exactly this purpose but the Realtime provider doesn't use it.



What does this PR do?
Websocket APIs are noticeabley faster for certain models. Most notably,
gpt-realtime-1.5andgpt-5.2Unfortunately:
Fortunately, it's straightforwardIt's not super straightforward to add support for both, but I've done it.We hide the choice of implementation behind the facade of
LLmProvider, so it's seamless from the developer PoVThis is a pretty substantial PR, so I've split it into individual commits:
Type of change
Testing
Unit tests + "real" provider tests
Checklist
make formatNote
High Risk
Large refactor of core LLM streaming/provider plumbing and introduces persistent WebSocket protocols with new routing/fallback logic; issues here can impact all agent responses, tool calling, and turn lifecycle behavior.
Overview
Adds first-class WebSocket-based LLM backends for OpenAI: a Realtime provider with diff-sync (
realtime_provider.py) and a Responses-API WebSocket provider with continuation/divergence handling (websocket_provider.py), plus shared WS stream utilities (stream.py).Refactors
provider.pyinto aLlmProviderfacade that normalizes config/tools, selects HTTP vs WS backends (including HTTP fallback when WS can’t honor certainLlmConfigfields), and centralizes model capability detection (_get_model_config).LlmAgentis updated to use the facade, warm providers onCallStarted, lazily initialize asyncio primitives for Python 3.9, and move tool merging/resolution intotools/utils.py(including native vs fallbackweb_searchbehavior).Updates examples/tests/scripts to the new
LlmProviderAPI and streaming semantics (noasync with), adds latency benchmarking and provider test scripts, and expands unit tests to cover backend routing, warmup/tool propagation, model validation, and WS/realtime-specific behaviors.Written by Cursor Bugbot for commit c571e05. This will update automatically on new commits. Configure here.